#Part 1 The purpose of this notebook is to determine the selective benefit of glucosinolate and flavonoid compounds through plant competition by creating selection gradients. Selection gradients will involve final body mass as the proxy for fitness, but this will be replaced with fitness once the measurement is in. Concentration will be on the x-axis. This analysis will account for family and greenhouse location.

#Part 2 The second purpose is to determine if glucosinolates and flavonoids influence suscpetibility to pathogens and if this susceptibility results in increased fitness

#Read in and prep data

library(ggplot2)
library(lme4)
#library(tidyr)
library(cowplot)
library(grid)
library(gridExtra)
library(dplyr)
source("GGPlot_Themes.R")

#read in data
rm(list=ls())
dat<-read.csv("DataSynthesis.csv")

#Remove maple controls data from the data set.
dat<-dat %>% filter(treatment!="mcnt") %>% droplevels()

#Assign family column
prefamily<-gsub("*.\\|","",dat$Tag)
dat$Family<-gsub("\\-.*","",prefamily)

#remove those with fertilizer treatment, and extra genotypes that are only in the alone treatment. 
dat<-dat[!grepl("i",dat$Sample,fixed=T),]

#Initializing columns to avoid constant error messages. 
dat$WhiteFungLogis<-NA
dat$BlackFungLogis<-NA

#Generating data frames with summarized leaf data (dat2) and summarized genotype data (dat3)


#Logging data
dat$Fern<-log(dat$Fern+1)
dat$gluc_Conc<-log(dat$gluc_Conc)
dat$flav_Conc<-log(dat$flav_Conc)
dat$ChlorA<-log(dat$ChlorA)
NaNs produced
dat$ThripsDam<-log(dat$ThripsDam+1)
dat$BlackPathDam<-log(dat$BlackPathDam+1)


#Creating leaf area vector 
dat$GM_Leaf_Area<-dat$GM_Leaf_Len*dat$GM_Leaf_Wid


#Summarizing: Taking the mean value of leaves. This tibble contains data at the level of the plant means. 
dat2<-dat %>% group_by(Tag) %>% summarize(ChlorA=mean(ChlorA),ChlorB=mean(ChlorB),gluc_Conc=mean(gluc_Conc),flav_Conc=mean(flav_Conc),Family=first(Family),treatment=first(treatment),gh_row=first(gh_row),gh_bench=first(gh_bench),GM_TotalLeaf_Area=first(GM_TotalLeaf_Area),comp_number=first(comp_number),ThripsDam=mean(ThripsDam),WhiteFungDam=mean(WhiteFungDam),BlackPathDam=mean(BlackPathDam),Fern=mean(Fern),gh_col=first(gh_col),GM_Leaf_Area=mean(GM_Leaf_Area))

#All of these genotypes died (15 Genotypes).(We are simply missing a final measurement for e|JBCHY1-1-50|Q|240) That is only 3% Mortality. 
dead<-dat2[is.na(dat2$GM_TotalLeaf_Area),]

#Removing those with dead competitors from the garlic mustard treatment. ("e|JBCHY1-1-50|Q|240") did not die, we are simply missing the final measurement for it.

dead_competitors<-dead %>% filter(treatment=="gm",Tag!="e|JBCHY1-1-50|Q|240") %>% select(comp_number)

#Removing those with dead competitors from the analysis. 
dat2<-dat2 %>% filter(!comp_number %in% dead_competitors$comp_number)
dat<-dat %>% filter(!comp_number %in% dead_competitors$comp_number)


##Summarizing: Taking the mean value of plants. This tibble contains data at the level of the family means within each treatment. 
dat3<-dat2 %>% drop_na(GM_TotalLeaf_Area) %>% group_by(Family,treatment)  %>% summarize_if(is.numeric,mean)



#Because of how zero inflated white pathogen damage is, i will use a logistic regression to model it.
dat2$WhiteFungLogis<-NA
dat2$WhiteFungLogis[dat2$WhiteFungDam==0]<-0
dat2$WhiteFungLogis[dat2$WhiteFungDam>0]<-1

#Searching for a fitness trade-off between the alone and interspecific competition treatment.

source("GGPlot_Themes.R")

#Calculating family means within treatment. 
TradeOff<-dat3 %>% filter(treatment=="a") %>% drop_na(GM_TotalLeaf_Area) %>%  select(LeafSizeAlone=GM_TotalLeaf_Area,Family,gluc_ConcAlone=gluc_Conc) %>% right_join(dat3 %>% filter(treatment=="m") %>%  select(LeafSizeMaple=GM_TotalLeaf_Area,Family,gluc_ConcMaple=gluc_Conc),by="Family")

#Calculating standard error of each family
StdErr<-dat2 %>% select(Family,treatment,GM_TotalLeaf_Area) %>% group_by(Family,treatment) %>% drop_na(GM_TotalLeaf_Area) %>%  summarize(StdErr=sd(GM_TotalLeaf_Area)/sqrt(length(GM_TotalLeaf_Area)),size=length(GM_TotalLeaf_Area))

#Shifting the data to be in long form. 
StdErr2<-StdErr %>% filter(treatment=="a") %>% select(StdErrAlone=StdErr,Family) %>% right_join(StdErr %>% filter(treatment=="m") %>% select(StdErrMaple=StdErr,Family),by="Family")

TradeOff2<-StdErr2 %>% left_join(TradeOff)
Joining, by = "Family"
#tiff("Selection_Figures/TradeOff.tiff", units="in", width=10, height=6, res=300)

ggplot(TradeOff2,aes(y=LeafSizeAlone,x=LeafSizeMaple))+
  geom_point()+
  geom_linerange(aes(ymin = LeafSizeAlone - StdErrAlone, 
                    ymax = LeafSizeAlone + StdErrAlone))+
  geom_errorbarh(aes(xmin = LeafSizeMaple - StdErrMaple,
                    xmax = LeafSizeMaple + StdErrMaple))+
theme_simple()+
  xlab("Performance with Maple")+
  ylab("Performance Alone")

#dev.off()
summary(lm(LeafSizeAlone~LeafSizeMaple,data=TradeOff2))

Call:
lm(formula = LeafSizeAlone ~ LeafSizeMaple, data = TradeOff2)

Residuals:
    Min      1Q  Median      3Q     Max 
-2019.5  -657.0  -265.5   907.7  2364.0 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)   9590.03571 1616.26626   5.933 6.87e-06 ***
LeafSizeMaple   -0.00936    0.18258  -0.051     0.96    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1244 on 21 degrees of freedom
Multiple R-squared:  0.0001251, Adjusted R-squared:  -0.04749 
F-statistic: 0.002628 on 1 and 21 DF,  p-value: 0.9596

#Visualizing genetic variation and greenhouse variation, which will be controlled for.

#GH Bench
ggplot(dat2)+
  geom_point(aes(y=gluc_Conc,x=gh_bench,colour=as.factor(gh_bench)))


#GH Col
ggplot(dat2)+
  geom_point(aes(y=gluc_Conc,x=gh_col,colour=as.factor(gh_bench)))


#Investigating genetic differences by treatment
#gluc_Conc
boxplot(gluc_Conc~Family,data=dat2[dat2$treatment=="a",])

boxplot(gluc_Conc~Family,data=dat2[dat2$treatment=="m",])

boxplot(gluc_Conc~Family,data=dat2[dat2$treatment=="gm",])

#bodymass
boxplot(GM_TotalLeaf_Area~Family,data=dat2[dat2$treatment=="a",])

boxplot(GM_TotalLeaf_Area~Family,data=dat2[dat2$treatment=="m",])

boxplot(GM_TotalLeaf_Area~Family,data=dat2[dat2$treatment=="gm",])

#What influences performance? New model without gluc flav

summary(fit6) #This is a much cleaner result than using three different models for each treatment. I think instead i will use this model to assess pathogen damage. and i will determine the effect that the size of the maple has in the maple analysis. 
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
GM_TotalLeaf_Area ~ treatment + BlackPathDam + WhiteFungLogis +  
    ThripsDam + GM_Leaf_Area + (1 | gh_bench) + (1 | Family)
   Data: dat2

REML criterion at convergence: 7984.3

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-4.3919 -0.6053 -0.0053  0.5619  3.8123 

Random effects:
 Groups   Name        Variance Std.Dev.
 Family   (Intercept)  337263   580.7  
 gh_bench (Intercept)  395515   628.9  
 Residual             5196413  2279.6  
Number of obs: 440, groups:  Family, 23; gh_bench, 5

Fixed effects:
               Estimate Std. Error       df t value Pr(>|t|)    
(Intercept)     9733.05     407.19     7.61  23.903 1.95e-08 ***
treatmentgm    -2307.76     313.00   421.17  -7.373 8.92e-13 ***
treatmentm     -1123.98     270.07   412.01  -4.162 3.85e-05 ***
BlackPathDam    -955.83     172.11   427.56  -5.554 4.93e-08 ***
WhiteFungLogis  -609.25     256.84   428.46  -2.372   0.0181 *  
ThripsDam       -281.51     151.94   430.96  -1.853   0.0646 .  
GM_Leaf_Area    1744.43     139.88   431.25  12.471  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) trtmntg trtmntm BlckPD WhtFnL ThrpsD
treatmentgm -0.277                                     
treatmentm  -0.353  0.447                              
BlackPathDm -0.184 -0.037   0.025                      
WhiteFngLgs -0.200  0.054   0.119  -0.022              
ThripsDam   -0.253 -0.237  -0.032   0.021 -0.012       
GM_Leaf_Are -0.035  0.457   0.042  -0.010 -0.043 -0.288

#Model Diagnostics.

#Is there a cost to glucosinolate production?

summary(fit6) #As are flavonoids. 
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
GM_TotalLeaf_Area ~ treatment + BlackPathDam + WhiteFungLogis +  
    ThripsDam + GM_Leaf_Area + flav_Conc + (1 | Family) + (1 |  
    gh_bench)
   Data: dat2

REML criterion at convergence: 6273.7

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-4.4264 -0.5954  0.0449  0.6186  3.5043 

Random effects:
 Groups   Name        Variance Std.Dev.
 Family   (Intercept)  268511   518.2  
 gh_bench (Intercept)  561259   749.2  
 Residual             5418151  2327.7  
Number of obs: 347, groups:  Family, 23; gh_bench, 5

Fixed effects:
                Estimate Std. Error        df t value Pr(>|t|)    
(Intercept)    10014.241    476.587     7.425  21.012 6.93e-08 ***
treatmentgm    -1953.425    374.641   332.928  -5.214 3.25e-07 ***
treatmentm      -972.699    313.930   324.675  -3.098 0.002115 ** 
BlackPathDam    -870.632    197.491   335.056  -4.408 1.40e-05 ***
WhiteFungLogis  -585.477    293.544   332.437  -1.995 0.046912 *  
ThripsDam       -232.612    173.556   335.843  -1.340 0.181064    
GM_Leaf_Area    1830.260    157.673   338.907  11.608  < 2e-16 ***
flav_Conc       2786.414    744.882   338.626   3.741 0.000215 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) trtmntg trtmntm BlckPD WhtFnL ThrpsD GM_L_A
treatmentgm -0.223                                            
treatmentm  -0.325  0.454                                     
BlackPathDm -0.169 -0.050   0.049                             
WhiteFngLgs -0.188  0.048   0.134  -0.046                     
ThripsDam   -0.225 -0.204  -0.001   0.063 -0.014              
GM_Leaf_Are -0.011  0.470   0.065  -0.034 -0.041 -0.301       
flav_Conc    0.158  0.210   0.151   0.088  0.035  0.162  0.087

#Visualization – The conditional benefit of glucosinolates (allelopathy)


aloneSlope<-function(x){
  y=-3966.60*x+ 13751.96
  return(y)
}
mapleSlope<-function(x){
  y=(-4254.2+8417.95)*x+13751.96 -9243.71
  return(y)
}

mustardSlope<-function(x){
  y=+13751.96-8376.67#Non significant slope
  return(y)
}

minM<-min(dat2$gluc_Conc[dat2$treatment=="m"],na.rm = T)
maxM<-max(dat2$gluc_Conc[dat2$treatment=="m"],na.rm = T)

minA<-min(dat2$gluc_Conc[dat2$treatment=="a"],na.rm = T)
maxA<-max(dat2$gluc_Conc[dat2$treatment=="a"],na.rm = T)

minG<-min(dat2$gluc_Conc[dat2$treatment=="gm"],na.rm = T)
maxG<-max(dat2$gluc_Conc[dat2$treatment=="gm"],na.rm = T)

#tiff("Selection_Figures/Gluc_Benefit.tiff", units="in", width=10, height=6, res=300)
library(ggplot2)
ggplot(dat2)+
  geom_point(aes(y=GM_TotalLeaf_Area,x=gluc_Conc,colour=treatment),size=2)+
  geom_segment(x=minA,xend=maxA,y=aloneSlope(minA),yend=aloneSlope(maxA),colour="#009E73",size=1.5)+
    geom_segment(x=minM,xend=maxM,y=mapleSlope(minM),yend=mapleSlope(maxM),colour="#E69F00",size=1.5)+
  geom_segment(x=minG,xend=maxG,y=mustardSlope(minG),yend=mustardSlope(maxG),colour="#56B4E9",size=1.5)+theme_simple()+
  ylab(bquote(bold("Performance\n(Total Leaf Area "~(mm^2)~")")))+xlab(bquote(bold("[Total Glucosinolate] " (mg/ml))))+
  scale_colour_manual(values=c("#009E73","#56B4E9","#E69F00"),labels=c("Alone","Garlic Mustard","Maple"))

#dev.off()

minA
[1] 0.7713421

#Visualizing the effect of flavonoid on fitness.

summary(lmer(GM_TotalLeaf_Area~treatment+flav_Conc+BlackPathDam+Fern+(1|Family)+(1|gh_bench/gh_col), data=dat2))
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: GM_TotalLeaf_Area ~ treatment + flav_Conc + BlackPathDam + Fern +  
    (1 | Family) + (1 | gh_bench/gh_col)
   Data: dat2

REML criterion at convergence: 6473.8

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.1990 -0.6278  0.0560  0.6004  2.6495 

Random effects:
 Groups          Name        Variance Std.Dev.
 gh_col:gh_bench (Intercept)  660632   812.8  
 Family          (Intercept)  210327   458.6  
 gh_bench        (Intercept) 1767108  1329.3  
 Residual                    7165525  2676.8  
Number of obs: 350, groups:  gh_col:gh_bench, 28; Family, 23; gh_bench, 5

Fixed effects:
             Estimate Std. Error       df t value Pr(>|t|)    
(Intercept)   7682.44    1156.80    33.30   6.641 1.42e-07 ***
treatmentgm  -3897.18     385.80   333.76 -10.102  < 2e-16 ***
treatmentm   -1133.35     364.02   330.79  -3.113  0.00201 ** 
flav_Conc     2471.19    1040.31   341.75   2.375  0.01808 *  
BlackPathDam   -77.69      30.19   335.79  -2.573  0.01050 *  
Fern          -125.75      57.56   330.33  -2.185  0.02962 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) trtmntg trtmntm flv_Cn BlckPD
treatmentgm -0.294                              
treatmentm  -0.260  0.475                       
flav_Conc   -0.795  0.198   0.137               
BlackPathDm -0.163 -0.043   0.021   0.090       
Fern        -0.035 -0.037  -0.073   0.030 -0.156
aloneSlope<-function(x){
  y=2024.62 *x+  8035.30
  return(y)
}
mapleSlope<-function(x){
  y=(2024.62 )*x+ 8035.30 -1067.02
  return(y)
}

mustardSlope<-function(x){
  y=2024.62 *x+ 8035.30-3746.91#Non significant slope
  return(y)
}

minM<-min(dat2$flav_Conc[dat2$treatment=="m"],na.rm = T)
maxM<-max(dat2$flav_Conc[dat2$treatment=="m"],na.rm = T)

minA<-min(dat2$flav_Conc[dat2$treatment=="a"],na.rm = T)
maxA<-max(dat2$flav_Conc[dat2$treatment=="a"],na.rm = T)

minG<-min(dat2$flav_Conc[dat2$treatment=="gm"],na.rm = T)
maxG<-max(dat2$flav_Conc[dat2$treatment=="gm"],na.rm = T)


#tiff("Selection_Figures/Flav_Benefit.tiff", units="in", width=10, height=6, res=300)
ggplot(dat2)+
  geom_point(aes(y=GM_TotalLeaf_Area,x=flav_Conc,colour=treatment),size=2)+
  geom_segment(x=minA,xend=maxA,y=aloneSlope(minA),yend=aloneSlope(maxA),colour="#009E73",size=1.5)+
    geom_segment(x=minM,xend=maxM,y=mapleSlope(minM),yend=mapleSlope(maxM),colour="#E69F00",size=1.5)+theme_simple()+
  geom_segment(x=minG,xend=maxG,y=mustardSlope(minG),yend=mustardSlope(maxG),colour="#56B4E9",size=1.5)+theme_simple()+
  ylab(bquote(bold("Performance\n(Total Leaf Area "~(mm^2)~")")))+xlab(bquote(bold("[Total Flavonoid] " (mg/ml))))+
  scale_colour_manual(values=c("#009E73","#56B4E9","#E69F00"),labels=c("Alone","Garlic Mustard","Maple"))

#dev.off()

#Visualization– The Detriment of pathogens and ferns

summary(lmer(GM_TotalLeaf_Area ~ Fern+
(1 | Family) + (1 | gh_bench/gh_col) ,data=dat2))
boundary (singular) fit: see ?isSingular
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: GM_TotalLeaf_Area ~ Fern + (1 | Family) + (1 | gh_bench/gh_col)
   Data: dat2

REML criterion at convergence: 9607.6

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.85397 -0.70508  0.04291  0.74619  3.11787 

Random effects:
 Groups          Name        Variance Std.Dev.
 gh_col:gh_bench (Intercept)    96624  310.8  
 Family          (Intercept)        0    0.0  
 gh_bench        (Intercept)  2947085 1716.7  
 Residual                    10094168 3177.1  
Number of obs: 507, groups:  gh_col:gh_bench, 28; Family, 23; gh_bench, 5

Fixed effects:
            Estimate Std. Error       df t value Pr(>|t|)    
(Intercept) 7896.911    807.136    4.071   9.784 0.000561 ***
Fern        -226.020     56.097  432.082  -4.029 6.61e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
     (Intr)
Fern -0.055
convergence code: 0
boundary (singular) fit: see ?isSingular
a<-ggplot(dat2)+
  geom_point(aes(y=GM_TotalLeaf_Area,x=BlackPathDam))+theme_simple_multiCol()+
  geom_abline(intercept=8281.76,slope = -105.28,size=1.5)+
  xlab("Black Pathogen Damage")

b<-ggplot(dat2[dat2$WhiteFungDam<30,])+
  geom_point(aes(y=GM_TotalLeaf_Area,x=WhiteFungDam),colour="#999999")+theme_simple_multiCol()+
  theme(axis.title.x = element_text(color = "#999999", size = 16, face = "bold",margin=margin(3,0,3,0)),
      )+xlab("Powdery Mildew Damage")

c<-ggplot(dat2)+
  geom_point(aes(y=GM_TotalLeaf_Area,x=ThripsDam),colour="#E69F00")+theme_simple_multiCol()+xlab("Thrips Damage")+
    theme(axis.title.x = element_text(color = "#E69F00", size = 16, face = "bold",margin=margin(3,0,3,0)))

d<-ggplot(dat2)+
  geom_point(aes(y=GM_TotalLeaf_Area,x=Fern),colour="#009E73")+theme_simple_multiCol()+xlab("Fern Abundance")+
   geom_abline(intercept=8003.44,slope = -179.47,size=1.5,color="#009E73")+
  theme(axis.title.x = element_text(color = "#009E73", size = 16, face = "bold",margin=margin(3,0,3,0)))
   


plot<-plot_grid(a, b,ncol=2,rel_widths = c(1,1))
Removed 61 rows containing missing values (geom_point).Removed 61 rows containing missing values (geom_point).
plot2<-plot_grid(d, c,ncol=2,rel_widths = c(1,1))
Removed 9 rows containing missing values (geom_point).Removed 61 rows containing missing values (geom_point).
plot3<-plot_grid(a,b,c,d,ncol=2,rel_widths = c(1,1))
Removed 61 rows containing missing values (geom_point).Removed 61 rows containing missing values (geom_point).Removed 61 rows containing missing values (geom_point).Removed 9 rows containing missing values (geom_point).
plot4<-plot_grid(a,b,c,ncol=1,rel_widths = c(1,1,1))
Removed 61 rows containing missing values (geom_point).Removed 61 rows containing missing values (geom_point).Removed 61 rows containing missing values (geom_point).
plot5<-plot_grid(a,b,c,ncol=3,rel_widths = c(1,1,1))
Removed 61 rows containing missing values (geom_point).Removed 61 rows containing missing values (geom_point).Removed 61 rows containing missing values (geom_point).
y.grob<-textGrob(bquote(bold("Shoot Area "(mm^2))),gp=gpar(fontface="bold",fontsize=20),rot=90)


#tiff("Selection_Figures/PathogenEffect.tiff", units="in", width=14, height=6, res=300)
grid.arrange(plot,left=y.grob)

#dev.off()

#tiff("Selection_Figures/PathogenEffect2.tiff", units="in", width=14, height=6, res=300)
grid.arrange(plot2,left=y.grob)

#dev.off()

#tiff("Selection_Figures/PathogenEffect3.tiff", units="in", width=14, height=10, res=300)
grid.arrange(plot3,left=y.grob)

#dev.off

grid.arrange(plot4,left=y.grob)



#tiff("Selection_Figures/PathogenEffect5.tiff", units="in", width=16, height=5, res=300)
grid.arrange(plot5,left=y.grob)

#dev.off

#tiff("Selection_Figures/FernEffect.tiff", units="in", width=8, height=6, res=300)
grid.arrange(d,left=y.grob)

#dev.off

#—————————————– #Part 2

#Influence of gluc and flav on defence.

hist(dat$Fern)
Warning messages:
1: Unknown or uninitialised column: `WhiteFungLogis`. 
2: Unknown or uninitialised column: `WhiteFungLogis`. 
3: Unknown or uninitialised column: `WhiteFungLogis`. 
4: Unknown or uninitialised column: `WhiteFungLogis`. 
5: Unknown or uninitialised column: `WhiteFungLogis`. 
6: Unknown or uninitialised column: `WhiteFungLogis`. 
7: Unknown or uninitialised column: `WhiteFungLogis`. 
8: Unknown or uninitialised column: `WhiteFungLogis`. 
9: Unknown or uninitialised column: `WhiteFungLogis`. 
10: Unknown or uninitialised column: `WhiteFungLogis`. 

#Modelling : Negative binomial on Thrips Damage.

summary(fit_g)$coef
$cond
             Estimate Std. Error   z value     Pr(>|z|)
(Intercept)  3.585466  0.5478174  6.545002 5.949477e-11
gluc_Conc   -2.793644  0.5756398 -4.853111 1.215394e-06

$zi
NULL

$disp
NULL

#Visualize: Thrips Damage predicted by glucosinolates and flavonoids


source("GGPlot_Themes.R")
Warning messages:
1: Unknown or uninitialised column: `WhiteFungLogis`. 
2: Unknown or uninitialised column: `WhiteFungLogis`. 
3: Unknown or uninitialised column: `WhiteFungLogis`. 
4: Unknown or uninitialised column: `WhiteFungLogis`. 
#Reversing link function, to estimate the data on the response scale. 
flavSlope=function(x){
  y=exp(-2.4621 *x+2.9746)
  return(y)
}
glucSlope=function(x){
  y=exp(-2.7869*x+3.5755)
  return(y)
}

#Determining x range to fit the line to 
flavplot=seq(min(dat$flav_Conc,na.rm = T),max(dat$flav_Conc,na.rm=T),length.out = 688)
glucplot=seq(min(dat$gluc_Conc,na.rm = T),max(dat$gluc_Conc,na.rm=T),length.out = 708)

#Calculating slope values
flavy<-flavSlope(flavplot)
glucy<-glucSlope(glucplot)


#tiff("Defence_Figures/FlavonoidThrips.tiff", units="in", width=10, height=6, res=300)
ggplot(dat[!is.na(dat$flav_Conc),])+
  geom_point(aes(y=ThripsDam,x=flav_Conc))+theme_simple()+
  geom_path(x=flavplot,y=flavy,size=1,colour="#999999")+
  
  scale_y_continuous(breaks=seq(0,70,5))+
  ylab("Thrips Damage")+
xlab(bquote(bold("[Total Flavonoid] " (mg/ml))))

#dev.off()

ggplot(dat[!is.na(dat$gluc_Conc),])+
  geom_point(aes(y=ThripsDam,x=gluc_Conc))+theme_simple()+
  geom_path(x=glucplot,y=glucy,size=1,colour="#999999")+
  
  scale_y_continuous(breaks=seq(0,70,5))+
  ylab("Thrips Damage")+
xlab(bquote(bold("[Total Glucosinolate] " (mg/ml))))

I think that a logistic regression is more appropriate for fungal abundance, because the count of fungal infection could be arbuitrary, especially when fungal patches were large or the leaf was completely covered.

#WhitePathDam – logistic regression

summary(fit.4) #Garlic mustard in the maple treatment have less occurence of fungal colonization. 
Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: WhiteFungLogis ~ treatment + (1 | Family)
   Data: dat2

     AIC      BIC   logLik deviance df.resid 
   519.2    535.7   -255.6    511.2      451 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.3885 -0.6239 -0.4587  0.9947  3.3329 

Random effects:
 Groups Name        Variance Std.Dev.
 Family (Intercept) 0.544    0.7376  
Number of obs: 455, groups:  Family, 23

Fixed effects:
            Estimate Std. Error z value Pr(>|z|)   
(Intercept)  -0.6835     0.2395  -2.854  0.00431 **
treatmentgm  -0.4101     0.2613  -1.569  0.11656   
treatmentm   -0.8037     0.2756  -2.916  0.00355 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Correlation of Fixed Effects:
            (Intr) trtmntg
treatmentgm -0.509        
treatmentm  -0.474  0.447 

#Visualizing – effect of treatment on proportion of fungal abundance.

#Modelling: Negative Binomial – BlackPathDam

fit_2<-glmmTMB(BlackPathDam~treatment+flav_Conc+(1|Family/Tag),family=nbinom2,data=dat)
fit_2<-glmmTMB(BlackPathDam~treatment+flav_Conc+(1|Family/Tag),family=nbinom2,data=dat)
summary(fit_2)
 Family: nbinom2  ( log )
Formula:          BlackPathDam ~ treatment + flav_Conc + (1 | Family/Tag)
Data: dat

     AIC      BIC   logLik deviance df.resid 
  1822.7   1854.3   -904.3   1808.7      669 

Random effects:

Conditional model:
 Groups     Name        Variance Std.Dev.
 Tag:Family (Intercept) 1.4410   1.2004  
 Family     (Intercept) 0.2517   0.5017  
Number of obs: 676, groups:  Tag:Family, 423; Family, 23

Overdispersion parameter for nbinom2 family (): 0.641 

Conditional model:
            Estimate Std. Error z value Pr(>|z|)   
(Intercept)   0.7826     0.4876   1.605   0.1085   
treatmentgm   0.1049     0.2480   0.423   0.6724   
treatmentm   -0.5204     0.2500  -2.082   0.0374 * 
flav_Conc    -1.5060     0.5172  -2.912   0.0036 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

#Visualizing: The effect of treatment and flavonoid abundance on black pathogen abundance.

#Visualization – Black pathogen damage by treatment


plot2<-dat2 %>% drop_na(BlackPathDam) %>% group_by(treatment) %>% summarize(BlackPathAve=mean(BlackPathDam,na.rm=T))

ggplot(plot2)+
  geom_col(aes(x=treatment,y=BlackPathAve,fill=treatment))+theme_simple()+ylab("Black Pathogen Infection\n(spots/leaf)")+xlab("Treatment")+theme(legend.position = "none")

#Modelling: Negative Binomial- Fern abundance

summary(fit_1)
 Family: nbinom2  ( log )
Formula:          Fern ~ treatment + (1 | gh_bench)
Data: datFern

     AIC      BIC   logLik deviance df.resid 
   766.3    787.6   -378.2    756.3      511 

Random effects:

Conditional model:
 Groups   Name        Variance Std.Dev.
 gh_bench (Intercept) 1.803    1.343   
Number of obs: 516, groups:  gh_bench, 5

Overdispersion parameter for nbinom2 family (): 0.0905 

Conditional model:
            Estimate Std. Error z value Pr(>|z|)  
(Intercept)  -1.5550     0.7070  -2.199   0.0279 *
treatmentgm   1.1243     0.5038   2.232   0.0256 *
treatmentm    0.9924     0.4285   2.316   0.0205 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Warning messages:
1: Unknown or uninitialised column: `WhiteFungLogis`. 
2: Unknown or uninitialised column: `WhiteFungLogis`. 
3: Unknown or uninitialised column: `WhiteFungLogis`. 
4: Unknown or uninitialised column: `WhiteFungLogis`. 

#Visualizing — the distribution of pathogens by treatment.

#Visualizing – effect of flavonoids on fern abundance.


PoisSlope=function(x,int){
  y=exp((-0.02353 -3.12475)*x+int)
  return(y)
}
exp(-0.6571)

flavplot=seq(min(dat$flav_Conc,na.rm = T),max(dat$flav_Conc,na.rm=T),length.out = 707)

flavyA<-PoisSlope(flavplot,-2.1683)
flavyM<-PoisSlope(flavplot,-1.60546-2.81450)
flavyGM<-PoisSlope(flavplot,-2.1683-0.2786)


#tiff("Defence_Figures/FlavonoidFern.tiff", units="in", width=10, height=6, res=300)
ggplot(dat)+
  geom_point(aes(y=Fern,x=flav_Conc,colour=treatment))+theme_simple()+
 # geom_path(x=flavplot,y=flavyA,size=1,colour="#009E73")
  #geom_path(x=flavplot,y=flavyGM,size=1,colour="#56B4E9")
  geom_path(x=flavplot,y=flavyM,size=1,colour="#E69F00")+
      scale_colour_manual(values=c("#009E73","#56B4E9","#E69F00"),labels=c("Alone","Garlic Mustard","Maple"))+
  scale_y_continuous(breaks=c(0,5,10,15,20,25,30,35,40))+
  ylab("Fern Abundance")+
xlab(bquote(bold("[Total Flavonoid] " (mg/ml))))
#dev.off()

How can we know that healthy plants dont just exhibit more secondary compounds and not that those with more secondary compounds are healthier?

---
title: "Selective benefit of Glucosinolate and flavonoids"
output: html_notebook
---
#Part 1
The purpose of this notebook is to determine the selective benefit of glucosinolate and flavonoid compounds through plant competition by creating selection gradients. Selection gradients will involve final body mass as the proxy for fitness, but this will be replaced with fitness once the measurement is in. Concentration will be on the x-axis. This analysis will account for family and greenhouse location. 

#Part 2
The second purpose is to determine if glucosinolates and flavonoids influence suscpetibility to pathogens and if this susceptibility results in increased fitness




#Read in and prep data
```{r}
library(ggplot2)
library(lme4)
#library(tidyr)
library(cowplot)
library(grid)
library(gridExtra)
library(dplyr)
source("GGPlot_Themes.R")

#read in data
rm(list=ls())
dat<-read.csv("DataSynthesis.csv")

#Remove maple controls data from the data set.
dat<-dat %>% filter(treatment!="mcnt") %>% droplevels()

#remove those with fertilizer treatment, and extra genotypes that are only in the alone treatment. 
dat<-dat[!grepl("i",dat$Sample,fixed=T),]

#Initializing columns to avoid constant error messages. 
dat$WhiteFungLogis<-NA
dat$BlackFungLogis<-NA
```


#Generating data frames with summarized leaf data (dat2) and summarized genotype data (dat3)
```{r}

#Logging data
dat$Fern<-log(dat$Fern+1)
dat$gluc_Conc<-log(dat$gluc_Conc)
dat$flav_Conc<-log(dat$flav_Conc)
dat$ChlorA<-log(dat$ChlorA)
dat$ThripsDam<-log(dat$ThripsDam+1)
dat$BlackPathDam<-log(dat$BlackPathDam+1)


#Creating leaf area vector 
dat$GM_Leaf_Area<-dat$GM_Leaf_Len*dat$GM_Leaf_Wid


#Summarizing: Taking the mean value of leaves. This tibble contains data at the level of the plant means. 
dat2<-dat %>% group_by(Tag) %>% summarize(ChlorA=mean(ChlorA),ChlorB=mean(ChlorB),gluc_Conc=mean(gluc_Conc),flav_Conc=mean(flav_Conc),Family=first(Family),treatment=first(treatment),gh_row=first(gh_row),gh_bench=first(gh_bench),GM_TotalLeaf_Area=first(GM_TotalLeaf_Area),comp_number=first(comp_number),ThripsDam=mean(ThripsDam),WhiteFungDam=mean(WhiteFungDam),BlackPathDam=mean(BlackPathDam),Fern=mean(Fern),gh_col=first(gh_col),GM_Leaf_Area=mean(GM_Leaf_Area))

#All of these genotypes died (15 Genotypes).(We are simply missing a final measurement for e|JBCHY1-1-50|Q|240) That is only 3% Mortality. 
dead<-dat2[is.na(dat2$GM_TotalLeaf_Area),]

#Removing those with dead competitors from the garlic mustard treatment. ("e|JBCHY1-1-50|Q|240") did not die, we are simply missing the final measurement for it.

dead_competitors<-dead %>% filter(treatment=="gm",Tag!="e|JBCHY1-1-50|Q|240") %>% select(comp_number)

#Removing those with dead competitors from the analysis. 
dat2<-dat2 %>% filter(!comp_number %in% dead_competitors$comp_number)
dat<-dat %>% filter(!comp_number %in% dead_competitors$comp_number)


##Summarizing: Taking the mean value of plants. This tibble contains data at the level of the family means within each treatment. 
dat3<-dat2 %>% drop_na(GM_TotalLeaf_Area) %>% group_by(Family,treatment)  %>% summarize_if(is.numeric,mean)



#Because of how zero inflated white pathogen damage is, i will use a logistic regression to model it.
dat2$WhiteFungLogis<-NA
dat2$WhiteFungLogis[dat2$WhiteFungDam==0]<-0
dat2$WhiteFungLogis[dat2$WhiteFungDam>0]<-1


  #Function to standardize variables. 
Standardize<-function(x){
  standard<-(x-mean(x,na.rm=T))/sd(x,na.rm=T)
}

#Standardizing leaf area. 
dat2$GM_Leaf_Area<-Standardize(dat2$GM_Leaf_Area)

```


#Searching for a fitness trade-off between the alone and interspecific competition treatment. 
```{r}
source("GGPlot_Themes.R")

#Calculating family means within treatment. 
TradeOff<-dat3 %>% filter(treatment=="a") %>% drop_na(GM_TotalLeaf_Area) %>%  select(LeafSizeAlone=GM_TotalLeaf_Area,Family,gluc_ConcAlone=gluc_Conc) %>% right_join(dat3 %>% filter(treatment=="m") %>%  select(LeafSizeMaple=GM_TotalLeaf_Area,Family,gluc_ConcMaple=gluc_Conc),by="Family")

#Calculating standard error of each family
StdErr<-dat2 %>% select(Family,treatment,GM_TotalLeaf_Area) %>% group_by(Family,treatment) %>% drop_na(GM_TotalLeaf_Area) %>%  summarize(StdErr=sd(GM_TotalLeaf_Area)/sqrt(length(GM_TotalLeaf_Area)),size=length(GM_TotalLeaf_Area))

#Shifting the data to be in long form. 
StdErr2<-StdErr %>% filter(treatment=="a") %>% select(StdErrAlone=StdErr,Family) %>% right_join(StdErr %>% filter(treatment=="m") %>% select(StdErrMaple=StdErr,Family),by="Family")

TradeOff2<-StdErr2 %>% left_join(TradeOff)


#tiff("Selection_Figures/TradeOff.tiff", units="in", width=10, height=6, res=300)

ggplot(TradeOff2,aes(y=LeafSizeAlone,x=LeafSizeMaple))+
  geom_point()+
  geom_linerange(aes(ymin = LeafSizeAlone - StdErrAlone, 
                    ymax = LeafSizeAlone + StdErrAlone))+
  geom_errorbarh(aes(xmin = LeafSizeMaple - StdErrMaple,
                    xmax = LeafSizeMaple + StdErrMaple))+
theme_simple()+
  xlab("Performance with Maple")+
  ylab("Performance Alone")
#dev.off()
summary(lm(LeafSizeAlone~LeafSizeMaple,data=TradeOff2))

```





#Visualizing genetic variation and greenhouse variation, which will be controlled for. 
```{r}
#GH Bench
ggplot(dat2)+
  geom_point(aes(y=gluc_Conc,x=gh_bench,colour=as.factor(gh_bench)))

#GH Col
ggplot(dat2)+
  geom_point(aes(y=gluc_Conc,x=gh_col,colour=as.factor(gh_bench)))

#Investigating genetic differences by treatment
#gluc_Conc
boxplot(gluc_Conc~Family,data=dat2[dat2$treatment=="a",])
boxplot(gluc_Conc~Family,data=dat2[dat2$treatment=="m",])
boxplot(gluc_Conc~Family,data=dat2[dat2$treatment=="gm",])
#bodymass
boxplot(GM_TotalLeaf_Area~Family,data=dat2[dat2$treatment=="a",])
boxplot(GM_TotalLeaf_Area~Family,data=dat2[dat2$treatment=="m",])
boxplot(GM_TotalLeaf_Area~Family,data=dat2[dat2$treatment=="gm",])
```




#What influences performance? New model without gluc flav 
```{r}
library(lmerTest)

#Modelling random effects.
fitfull<-lmer(GM_TotalLeaf_Area~treatment+BlackPathDam+WhiteFungLogis+ThripsDam+Fern+GM_Leaf_Area+(1|Family)+(1|gh_bench), data=dat2)

fitfull1<-lmer(GM_TotalLeaf_Area~treatment+BlackPathDam+WhiteFungLogis+ThripsDam+Fern+GM_Leaf_Area+(1|Family), data=dat2)

fitfull2<-lmer(GM_TotalLeaf_Area~treatment+BlackPathDam+WhiteFungLogis+ThripsDam+Fern+GM_Leaf_Area+(1|gh_bench), data=dat2)

fitfull3<-lmer(GM_TotalLeaf_Area~treatment+BlackPathDam+WhiteFungLogis+ThripsDam+Fern+GM_Leaf_Area+(1|gh_bench)+(1|Family:treatment), data=dat2)

fitfull4<-lmer(GM_TotalLeaf_Area~treatment+BlackPathDam+WhiteFungLogis+ThripsDam+Fern+GM_Leaf_Area+(1|gh_bench)+(1|Family/treatment), data=dat2)

#Is family important? 
anova(fitfull,fitfull2) #Yes, family predicts performance. 
#Is gh bench important? 
anova(fitfull,fitfull1) #Yes, gh_bench predicts performance. 
#Is there a GxE interaction? 
anova(fitfull,fitfull3) 
anova(fitfull,fitfull4) #No there does not appear to be. 

#Modelling fixed effects. 
fit<-lmer(GM_TotalLeaf_Area~treatment+BlackPathDam*WhiteFungLogis*ThripsDam+Fern+GM_Leaf_Area+(1|Family)+(1|gh_bench), data=dat2)

fit2<-update(fit,~.-ThripsDam:BlackPathDam:WhiteFungLogis)
anova(fit,fit2) #Not a significant three way interaction.

fit3<-update(fit2,~.-BlackPathDam:WhiteFungLogis)
anova(fit3,fit2) #There is not a significant two way interaction, 

fit4<-update(fit3,~.-BlackPathDam:ThripsDam)
anova(fit4,fit3)
#There is not a significant interaction between black path dam and thrips dam. 

fit5<-update(fit4,~.-WhiteFungLogis:ThripsDam)
anova(fit5,fit4) #There is not a significant whitefung dam and thrips dam interaction. 

summary(fit5)

#Fern is not significant. 
fit6<-lmer(GM_TotalLeaf_Area~treatment+BlackPathDam+WhiteFungLogis+ThripsDam+GM_Leaf_Area+(1|Family)+(1|gh_bench), data=dat2)

fit6<-lmer(GM_TotalLeaf_Area~treatment+BlackPathDam+WhiteFungLogis+ThripsDam+GM_Leaf_Area+(1|gh_bench)+(1|Family), data=dat2)

summary(fit6) #This is a much cleaner result than using three different models for each treatment. I think instead i will use this model to assess pathogen damage. and i will determine the effect that the size of the maple has in the maple analysis. 
#Percent variance explained: 
337263/ ( 395515 + 5196413+337263 )

#Does fern affect performance without accounting for leaf area (it is not affected by leaf area)
fit7<-lmer(GM_TotalLeaf_Area~treatment+Fern+(1|Family)+(1|gh_bench), data=dat2)
summary(fit7) #Without account for leaf area, fern is a very significant predictor of performance and is correlated with reduced garlic mustard size. Whether it is doing the reduction or it can simply appear when garlic mustard is a worst performer is unclear. These estimates should be used to visualize the effect of treatment on performance. 

#To what degree does family influence performance?

fitfull<-lmer(GM_TotalLeaf_Area~treatment+Fern+(1|Family)+(1|gh_bench), data=dat2)
fitfull2<-lmer(GM_TotalLeaf_Area~treatment+Fern+(1|gh_bench), data=dat2)

anova(fitfull,fitfull2) #Family only predicts performance when pathogens are accounted for. 
```

#Model Diagnostics. 
```{r}

#no heteroscedasticity
plot(fit6);plot(fit7)
#fairly normal. 
qqnorm(resid(fit6));qqnorm(resid(fit7))
#The model appears to be a good fit.
```

#Is there a cost to glucosinolate production? 
```{r}
fit6<-lmer(GM_TotalLeaf_Area~treatment+BlackPathDam+WhiteFungLogis+ThripsDam+GM_Leaf_Area+gluc_Conc+(1|Family)+(1|gh_bench), data=dat2)
summary(fit6) #No, infact glucosinolates are positively correlated with performance. 

#Is the cost dependent on the treatment
fit7<-lmer(GM_TotalLeaf_Area~treatment*gluc_Conc+BlackPathDam+WhiteFungLogis+ThripsDam+GM_Leaf_Area+(1|Family)+(1|gh_bench), data=dat2)
summary(fit7) 
anova(fit6,fit7)#There is an interaction between treatment. However it is postive in all treatments and maple performance was not accounted for so this model is nullified as it is uninformative. 



fit6<-lmer(GM_TotalLeaf_Area~treatment+BlackPathDam+WhiteFungLogis+ThripsDam+GM_Leaf_Area+flav_Conc+(1|Family)+(1|gh_bench), data=dat2)
summary(fit6) #As are flavonoids. 

#Is the cost dependent on the treatment? 
fit7<-lmer(GM_TotalLeaf_Area~treatment*flav_Conc+BlackPathDam+WhiteFungLogis+ThripsDam+GM_Leaf_Area+(1|Family)+(1|gh_bench), data=dat2)
summary(fit6) 

anova(fit6,fit7) #There is not a significant interaction with flavonoids. 

```





#Visualization -- The conditional benefit of glucosinolates (allelopathy)
```{r}

aloneSlope<-function(x){
  y=-3966.60*x+ 13751.96
  return(y)
}
mapleSlope<-function(x){
  y=(-4254.2+8417.95)*x+13751.96 -9243.71
  return(y)
}

mustardSlope<-function(x){
  y=+13751.96-8376.67#Non significant slope
  return(y)
}

minM<-min(dat2$gluc_Conc[dat2$treatment=="m"],na.rm = T)
maxM<-max(dat2$gluc_Conc[dat2$treatment=="m"],na.rm = T)

minA<-min(dat2$gluc_Conc[dat2$treatment=="a"],na.rm = T)
maxA<-max(dat2$gluc_Conc[dat2$treatment=="a"],na.rm = T)

minG<-min(dat2$gluc_Conc[dat2$treatment=="gm"],na.rm = T)
maxG<-max(dat2$gluc_Conc[dat2$treatment=="gm"],na.rm = T)

#tiff("Selection_Figures/Gluc_Benefit.tiff", units="in", width=10, height=6, res=300)
library(ggplot2)
ggplot(dat2)+
  geom_point(aes(y=GM_TotalLeaf_Area,x=gluc_Conc,colour=treatment),size=2)+
  geom_segment(x=minA,xend=maxA,y=aloneSlope(minA),yend=aloneSlope(maxA),colour="#009E73",size=1.5)+
    geom_segment(x=minM,xend=maxM,y=mapleSlope(minM),yend=mapleSlope(maxM),colour="#E69F00",size=1.5)+
  geom_segment(x=minG,xend=maxG,y=mustardSlope(minG),yend=mustardSlope(maxG),colour="#56B4E9",size=1.5)+theme_simple()+
  ylab(bquote(bold("Performance\n(Total Leaf Area "~(mm^2)~")")))+xlab(bquote(bold("[Total Glucosinolate] " (mg/ml))))+
  scale_colour_manual(values=c("#009E73","#56B4E9","#E69F00"),labels=c("Alone","Garlic Mustard","Maple"))
#dev.off()

minA


```

#Visualizing the effect of flavonoid on fitness.
```{r}
summary(lmer(GM_TotalLeaf_Area~treatment+flav_Conc+BlackPathDam+Fern+(1|Family)+(1|gh_bench/gh_col), data=dat2))

aloneSlope<-function(x){
  y=2024.62 *x+  8035.30
  return(y)
}
mapleSlope<-function(x){
  y=(2024.62 )*x+ 8035.30 -1067.02
  return(y)
}

mustardSlope<-function(x){
  y=2024.62 *x+ 8035.30-3746.91#Non significant slope
  return(y)
}

minM<-min(dat2$flav_Conc[dat2$treatment=="m"],na.rm = T)
maxM<-max(dat2$flav_Conc[dat2$treatment=="m"],na.rm = T)

minA<-min(dat2$flav_Conc[dat2$treatment=="a"],na.rm = T)
maxA<-max(dat2$flav_Conc[dat2$treatment=="a"],na.rm = T)

minG<-min(dat2$flav_Conc[dat2$treatment=="gm"],na.rm = T)
maxG<-max(dat2$flav_Conc[dat2$treatment=="gm"],na.rm = T)


#tiff("Selection_Figures/Flav_Benefit.tiff", units="in", width=10, height=6, res=300)
ggplot(dat2)+
  geom_point(aes(y=GM_TotalLeaf_Area,x=flav_Conc,colour=treatment),size=2)+
  geom_segment(x=minA,xend=maxA,y=aloneSlope(minA),yend=aloneSlope(maxA),colour="#009E73",size=1.5)+
    geom_segment(x=minM,xend=maxM,y=mapleSlope(minM),yend=mapleSlope(maxM),colour="#E69F00",size=1.5)+theme_simple()+
  geom_segment(x=minG,xend=maxG,y=mustardSlope(minG),yend=mustardSlope(maxG),colour="#56B4E9",size=1.5)+theme_simple()+
  ylab(bquote(bold("Performance\n(Total Leaf Area "~(mm^2)~")")))+xlab(bquote(bold("[Total Flavonoid] " (mg/ml))))+
  scale_colour_manual(values=c("#009E73","#56B4E9","#E69F00"),labels=c("Alone","Garlic Mustard","Maple"))
#dev.off()
```

#Visualization-- The Detriment of pathogens and ferns
```{r}
summary(lmer(GM_TotalLeaf_Area ~ Fern+
(1 | Family) + (1 | gh_bench/gh_col) ,data=dat2))

a<-ggplot(dat2)+
  geom_point(aes(y=GM_TotalLeaf_Area,x=BlackPathDam))+theme_simple_multiCol()+
  geom_abline(intercept=8281.76,slope = -105.28,size=1.5)+
  xlab("Black Pathogen Damage")

b<-ggplot(dat2[dat2$WhiteFungDam<30,])+
  geom_point(aes(y=GM_TotalLeaf_Area,x=WhiteFungDam),colour="#999999")+theme_simple_multiCol()+
  theme(axis.title.x = element_text(color = "#999999", size = 16, face = "bold",margin=margin(3,0,3,0)),
      )+xlab("Powdery Mildew Damage")

c<-ggplot(dat2)+
  geom_point(aes(y=GM_TotalLeaf_Area,x=ThripsDam),colour="#E69F00")+theme_simple_multiCol()+xlab("Thrips Damage")+
    theme(axis.title.x = element_text(color = "#E69F00", size = 16, face = "bold",margin=margin(3,0,3,0)))

d<-ggplot(dat2)+
  geom_point(aes(y=GM_TotalLeaf_Area,x=Fern),colour="#009E73")+theme_simple_multiCol()+xlab("Fern Abundance")+
   geom_abline(intercept=8003.44,slope = -179.47,size=1.5,color="#009E73")+
  theme(axis.title.x = element_text(color = "#009E73", size = 16, face = "bold",margin=margin(3,0,3,0)))
   


plot<-plot_grid(a, b,ncol=2,rel_widths = c(1,1))

plot2<-plot_grid(d, c,ncol=2,rel_widths = c(1,1))

plot3<-plot_grid(a,b,c,d,ncol=2,rel_widths = c(1,1))

plot4<-plot_grid(a,b,c,ncol=1,rel_widths = c(1,1,1))
plot5<-plot_grid(a,b,c,ncol=3,rel_widths = c(1,1,1))


y.grob<-textGrob(bquote(bold("Shoot Area "(mm^2))),gp=gpar(fontface="bold",fontsize=20),rot=90)


#tiff("Selection_Figures/PathogenEffect.tiff", units="in", width=14, height=6, res=300)
grid.arrange(plot,left=y.grob)
#dev.off()

#tiff("Selection_Figures/PathogenEffect2.tiff", units="in", width=14, height=6, res=300)
grid.arrange(plot2,left=y.grob)
#dev.off()

#tiff("Selection_Figures/PathogenEffect3.tiff", units="in", width=14, height=10, res=300)
grid.arrange(plot3,left=y.grob)
#dev.off

grid.arrange(plot4,left=y.grob)


#tiff("Selection_Figures/PathogenEffect5.tiff", units="in", width=16, height=5, res=300)
grid.arrange(plot5,left=y.grob)
#dev.off

#tiff("Selection_Figures/FernEffect.tiff", units="in", width=8, height=6, res=300)
grid.arrange(d,left=y.grob)
#dev.off

```



#-----------------------------------------
#Part 2





#Influence of gluc and flav on defence. 
```{r}

hist(dat$Fern)
hist(dat$ThripsDam)
hist(dat$WhiteFungDam)
hist(dat$BlackPathDam)

dat$BlackPathDam

#This data is very zero inflated and a poisson model will be too overdispersed. A Negative binomial distribution will be used. 
```



#Modelling : Negative binomial on Thrips Damage. 
```{r}
library("glmmTMB")

dat$ThripsDam<-ceiling(dat$ThripsDam)

fit_full<-glmmTMB(ThripsDam~treatment+gluc_Conc+flav_Conc+(1|Family/Tag)+(1|gh_bench),family=nbinom2,data=dat)

fit_1<-update(fit_full,~.-treatment)
anova(fit_full,fit_1) #treatment is unimportant. 

fit_2<-update(fit_1,~.+ gluc_Conc:flav_Conc)

anova(fit_1,fit_2)#interaction unimportant. 


fit_1<-glmmTMB(ThripsDam~gluc_Conc+flav_Conc+(1|Family/Tag)+(1|gh_bench),family=nbinom2,data=dat[!is.na(dat$flav_Conc),])

fit_2<-update(fit_1,~.- flav_Conc)
anova(fit_1,fit_2)#
#Flav_Conc is highly important. 



fit_2<-update(fit_1,~.- gluc_Conc)
anova(fit_1,fit_2)# gluc conc does not appear to be significant. I will try using the whole gluc conc data set. 

fit_1<-glmmTMB(ThripsDam~gluc_Conc+(1|Family/Tag)+(1|gh_bench),family=nbinom2,data=dat[!is.na(dat$gluc_Conc),])

fit_2<-update(fit_1,~.- gluc_Conc)
anova(fit_1,fit_2)# gluc conc is highly significant however when flavonoids are excluded. Therefore secondary compounds, both glucosinolates and flavonoids predicts reduction in thrips damage. 

#Best model includes both, however, putting both in the same model will distribute the effect amoungst both of the terms. This might not be appropriate, but i will check for this. 

fit_g<-glmmTMB(ThripsDam~gluc_Conc+(1|Family/Tag)+(1|gh_bench),family=nbinom2,data=dat[!is.na(dat$gluc_Conc),])
fit_f<-glmmTMB(ThripsDam~flav_Conc+(1|Family/Tag)+(1|gh_bench),family=nbinom2,data=dat[!is.na(dat$flav_Conc),])
fit_full<-glmmTMB(ThripsDam~gluc_Conc+flav_Conc+(1|Family/Tag)+(1|gh_bench),family=nbinom2,data=dat)

summary(fit_f)$coef
summary(fit_g)$coef
summary(fit_full)$coef

#As expected, the coefficients are distributed evenly amoungst gluc and flav concentrations, the conclusion is that both reduce thrips abundance and it is not possible to tell which does the most, however, the effect is more significant for flavonoids. 

plot(resid(fit_f))
plot(resid(fit_f)^2)
#The model appears to be a very good fit. 


#Permutation test....
fit_f<-glmmTMB(ThripsDam~flav_Conc+(1|Family/Tag)+(1|gh_bench),family=nbinom2,data=dat)
summary(fit_f)

datPerTest<-dat
zStore<-c()
for(i in 1:500){
  #Randomize flavonoid concentration.
  datPerTest$flav_Conc<-sample(dat$flav_Conc,length(dat$flav_Conc),replace = F)
  
  #Run Model with randomized flavonoid concentration and extract test statistic
  flavZval<-summary(update(fit_f,data=datPerTest))$coef[[1]][2,3]
  
  #Store z value.
  zStore[i]<-flavZval
}

sum(zStore<=-5.356)/length(zStore)
#Wth the permutation test, the p value is still 0, which indicates that this model is a very good fit for the data. 
hist(zStore)

```


#Visualize: Thrips Damage predicted by glucosinolates and flavonoids

```{r}

source("GGPlot_Themes.R")
#Reversing link function, to estimate the data on the response scale. 
flavSlope=function(x){
  y=exp(-2.4621 *x+2.9746)
  return(y)
}
glucSlope=function(x){
  y=exp(-2.7869*x+3.5755)
  return(y)
}

#Determining x range to fit the line to 
flavplot=seq(min(dat$flav_Conc,na.rm = T),max(dat$flav_Conc,na.rm=T),length.out = 688)
glucplot=seq(min(dat$gluc_Conc,na.rm = T),max(dat$gluc_Conc,na.rm=T),length.out = 708)

#Calculating slope values
flavy<-flavSlope(flavplot)
glucy<-glucSlope(glucplot)


#tiff("Defence_Figures/FlavonoidThrips.tiff", units="in", width=10, height=6, res=300)
ggplot(dat[!is.na(dat$flav_Conc),])+
  geom_point(aes(y=ThripsDam,x=flav_Conc))+theme_simple()+
  geom_path(x=flavplot,y=flavy,size=1,colour="#999999")+
  
  scale_y_continuous(breaks=seq(0,70,5))+
  ylab("Thrips Damage")+
xlab(bquote(bold("[Total Flavonoid] " (mg/ml))))
#dev.off()

ggplot(dat[!is.na(dat$gluc_Conc),])+
  geom_point(aes(y=ThripsDam,x=gluc_Conc))+theme_simple()+
  geom_path(x=glucplot,y=glucy,size=1,colour="#999999")+
  
  scale_y_continuous(breaks=seq(0,70,5))+
  ylab("Thrips Damage")+
xlab(bquote(bold("[Total Glucosinolate] " (mg/ml))))

```






I think that a logistic regression is more appropriate for fungal abundance, because the count of fungal infection could be arbuitrary, especially when fungal patches were large or the leaf was completely covered. 

#WhitePathDam -- logistic regression 
```{r}

#This is the biggest model that could converge. ... interaction with flavonoid could not.
fit_full_g<-glmer(WhiteFungLogis~treatment*gluc_Conc+flav_Conc+(1|Family),family=binomial,data=dat2[!is.na(dat2$flav_Conc),])

fit.1<-update(fit_full_g,~.-flav_Conc)
anova(fit_full_g,fit.1) #Flavonoid Concentration is not significant. 


#Testing glucosinolate treatment interaction. 
fit.2<-glmer(WhiteFungLogis~treatment*gluc_Conc+(1|Family),family=binomial,data=dat2)
fit.3<-glmer(WhiteFungLogis~treatment+gluc_Conc+(1|Family),family=binomial,data=dat2)
anova(fit.2,fit.3) #Glucosinolate:Treatment interaction is not significant.

#Testing glucosinolate involvment at all.
fit.4<-glmer(WhiteFungLogis~treatment+(1|Family),family=binomial,data=dat2[!is.na(dat2$gluc_Conc),])
fit.3<-glmer(WhiteFungLogis~treatment+gluc_Conc+(1|Family),family=binomial,data=dat2[!is.na(dat2$gluc_Conc),])
anova(fit.4,fit.3) #Glucosinolate Concentration is not a significant predictor

#Testing effect of treatment
fit.4<-glmer(WhiteFungLogis~treatment+(1|Family),family=binomial,data=dat2)
fit.5<-glmer(WhiteFungLogis~1+(1|Family),family=binomial,data=dat2)
anova(fit.4,fit.5)
#treatment is significant......

summary(fit.4) #Garlic mustard in the maple treatment have less occurence of fungal colonization. 

plot(fit.4)

#Permutation test
datPerTest<-dat2
treatzStore<-c()
for(i in 1:2000){
  #Randomize flavonoid concentration.
  datPerTest$treatment<-sample(dat2$treatment,length(dat2$treatment),replace = F)
  #Run Model with randomized flavonoid concentration and extract test statistic
  treatZval<-summary(update(fit.4,data=datPerTest))$coef[3,3]
  
  #Store z value.
  treatzStore[i]<-treatZval
}
sum(treatzStore<=-2.916)/length(treatzStore)
#Simulated p value is about 0.0025, which is very similar to the 0.003 I observed, suggesting this is a good model. 
```



#Visualizing -- effect of treatment on proportion of fungal abundance. 
```{r}
#Summarizing for Display: generating frequency of white funal infection by treatment
plot<-dat2 %>% drop_na(WhiteFungLogis) %>% group_by(treatment) %>% summarize(PercWhitFung=sum(WhiteFungLogis)/length(WhiteFungLogis)*100)


#tiff("Defence_Figures/TreatMeanWhiteFung.tiff", units="in", width=8, height=5, res=300)
ggplot(plot)+
  geom_col(aes(x=treatment,y=PercWhitFung,fill=treatment))+theme_simple()+ylab("Fungal Abundance\n (% Infected)")+
  scale_y_continuous(breaks = seq(0,30,5))+
scale_x_discrete(name="",labels=c("Alone","Garlic Mustard","Maple"))+
  scale_fill_manual(values=c("#009E73","#56B4E9","#E69F00"),labels=c("Alone","Garlic Mustard","Maple"))+
  theme_simple_multiCol()+theme(axis.title.y =  element_text(color = "black", size = 16, face = "bold",margin=margin(3,20,3,0)))
#dev.off()

```



#Modelling: Negative Binomial -- BlackPathDam 
```{r}
dat$BlackPathDam<-ceiling(dat$BlackPathDam)
fit_full<-glmmTMB(BlackPathDam~treatment+gluc_Conc+flav_Conc+(1|Family/Tag)+(1|gh_bench),family=nbinom2,data=dat)
fit_full_0.1<-glmmTMB(BlackPathDam~treatment+gluc_Conc+flav_Conc+(1|Family/Tag),family=nbinom2,data=dat)

anova(fit_full,fit_full_0.1)#gh_Bench was not an important random effect in this model. 

fit_full_0.1<-glmmTMB(BlackPathDam~treatment+gluc_Conc+flav_Conc+(1|Family/Tag),family=nbinom2,data=dat[!is.na(dat$flav_Conc),])
fit_1<-glmmTMB(BlackPathDam~treatment+gluc_Conc+(1|Family/Tag),family=nbinom2,data=dat[!is.na(dat$flav_Conc),])
anova(fit_full_0.1,fit_1) #Flav conc is a very important predictor of black pathogen damage. 

fit_1<-glmmTMB(BlackPathDam~treatment+gluc_Conc+(1|Family/Tag),family=nbinom2,data=dat[!is.na(dat$gluc_Conc),])
fit_2<-glmmTMB(BlackPathDam~treatment+(1|Family/Tag),family=nbinom2,data=dat[!is.na(dat$gluc_Conc),])
anova(fit_1,fit_2) #Glucosinolates are not a significant predictor at all. 


fit_2<-glmmTMB(BlackPathDam~treatment+flav_Conc+(1|Family/Tag),family=nbinom2,data=dat)
fit_3<-glmmTMB(BlackPathDam~flav_Conc+(1|Family/Tag),family=nbinom2,data=dat)
anova(fit_3,fit_2) #Treatment is a significant predictor of black pathogen damage. 


#Therefore, the best model is one with flavonoids and treatment. 

summary(fit_2)

plot(resid(fit_2)) #Model fits well.

#Permutation test. 

datPerTest<-dat
zStoreflav<-c()
zStoretreat<-c()
for(i in 1:500){
  #Randomize flavonoid concentration.
  datPerTest$flav_Conc<-sample(dat$flav_Conc,length(dat$flav_Conc),replace = F)
  datPerTest$treatment<-sample(dat$treatment,length(dat$treatment),replace = F)

  #New Model with randomized treatment and flavonoids
  newMod<-update(fit_2,data=datPerTest)
  
  # Extract test statistic
  flavZval<-summary(newMod)$coef[[1]][4,3]
  treatZval<-summary(newMod)$coef[[1]][3,3]

  #Store z value.
  zStoreflav[i]<-flavZval
  zStoretreat[i]<-treatZval
}

#treatment p value 
sum(zStoretreat<=-2.082)/length(zStoretreat) 
#The p value for treatment is 0.022, which is very close to the observed 0.03 p value.
sum(zStoreflav<=-2.8863)/length(zStoreflav)
#The p value of flavonoids is 0.002, which is very close to the observed 0.003 p value.
#Conclusion: This is a good model. 
```


#Visualizing: The effect of treatment and flavonoid abundance on black pathogen abundance. 
```{r}
source("GGPlot_Themes.R")
#Reversing link function, to estimate the data on the response scale. 
flavSlope=function(x,int){
  y=exp(-1.5060*x+int)
  return(y)
}



#Determining x range to fit the line to 
flavplot=seq(min(dat$flav_Conc,na.rm = T),max(dat$flav_Conc,na.rm=T),length.out = 680)


#Calculating slope values
flavyA<-flavSlope(flavplot, 0.7826)
flavyGM<-flavSlope(flavplot, 0.7826+0.1049)
flavyM<-flavSlope(flavplot, 0.7826-0.5204 )


#tiff("Defence_Figures/FlavonoidThrips.tiff", units="in", width=10, height=6, res=300)
ggplot(dat[!is.na(dat$flav_Conc),])+
  geom_point(aes(y=BlackPathDam,x=flav_Conc,colour=treatment))+
theme_simple()+
  geom_path(x=flavplot,y=flavyA,size=1,colour="#009E73")+
    geom_path(x=flavplot,y=flavyGM,size=1,colour="#56B4E9")+
  geom_path(x=flavplot,y=flavyM,size=1,colour="#E69F00")+

  scale_colour_manual(values=c("#009E73","#56B4E9","#E69F00"),labels=c("Alone","Garlic Mustard","Maple"))+

  scale_y_continuous(breaks=seq(0,70,5))+
  ylab("Black Pathogen Damage")+
xlab(bquote(bold("[Total Flavonoid] " (mg/ml))))
```


#Visualization -- Black pathogen damage by treatment
```{r}

plot2<-dat2 %>% drop_na(BlackPathDam) %>% group_by(treatment) %>% summarize(BlackPathAve=mean(BlackPathDam,na.rm=T))

ggplot(plot2)+
  geom_col(aes(x=treatment,y=BlackPathAve,fill=treatment))+theme_simple()+ylab("Black Pathogen Infection\n(spots/leaf)")+xlab("Treatment")+theme(legend.position = "none")

```




#Modelling: Negative Binomial- Fern abundance
```{r}
#Because there are two garlic mustard individuals per pot, and the unit we are looking at is only replicable at the pot level (fern abundance in a pot) duplicates within the same pot in the garlic mustard treatment need to be removed and averaged to remove pseudoreplication. 

#What i will do is make the competition number the tag for those in the GM treatment. This will have the effect of maintaining the leaf variation, but averaging it over each pot. 

# I will need to use dat2, summarized at the individual level, because we have a single observation at the pot level, not at the leaf level. 
datFern<-dat2
datFern$Tag<-as.character(datFern$Tag)
for(i in 1:length(datFern$Tag)){
  if(!is.na(datFern$comp_number[i])){
    datFern$Tag[i]<-datFern$comp_number[i]
  }
}
datFern
#I am excluding flavonoid concentration because there were no detectable flavonoids in the root samples. 
fit_full<-glmmTMB(Fern~treatment+gluc_Conc+(1|Family)+(1|gh_bench),family=nbinom2,data=datFern)
fit_full_0.1<-glmmTMB(Fern~treatment+gluc_Conc+(1|Family),family=nbinom2,data=datFern)
fit_full_0.2<-glmmTMB(Fern~treatment+gluc_Conc+(1|gh_bench),family=nbinom2,data=datFern)

anova(fit_full,fit_full_0.1)#Bench is an extremenly important predictor. 
anova(fit_full,fit_full_0.2)#Family is not. 


fit_full<-glmmTMB(Fern~treatment+gluc_Conc+(1|gh_bench),family=nbinom2,data=datFern[!is.na(datFern$gluc_Conc),])
fit_1<-glmmTMB(Fern~treatment+(1|gh_bench),family=nbinom2,data=datFern[!is.na(datFern$gluc_Conc),])
anova(fit_full,fit_1)#Glucosinolates are not a significant predictor of fern abundance (although the AIC is about the same)


fit_1<-glmmTMB(Fern~treatment+(1|gh_bench),family=nbinom2,data=datFern)
fit_2<-glmmTMB(Fern~1+(1|gh_bench),family=nbinom2,data=datFern)
anova(fit_2,fit_1)
#Fern abundance is predicted by treatment. 

summary(fit_1)$coef
plot(resid(fit_1))
plot(resid(fit_1)^2)

summary(fit_1)


#Permutation test
datPerTest<-datFern
zStoretreatGM<-c()
zStoretreatM<-c()

for(i in 1:500){
  #Randomize flavonoid concentration.
  datPerTest$treatment<-sample(datPerTest$treatment,length(datPerTest$treatment),replace = F)

  #New Model with randomized treatment and flavonoids
  newMod<-update(fit_1,data=datPerTest)
  
  # Extract test statistics for maple (M) and garlic mustard (gm) treatments
  MtreatZval<-summary(newMod)$coef[[1]][3,3]
  GMtreatZval<-summary(newMod)$coef[[1]][3,2]
  
  #Store z value.
  zStoretreatGM[i]<-GMtreatZval
  zStoretreatM[i]<-MtreatZval
}

sum(zStoretreatM>=2.316180)/length(zStoretreatM)
#Estimated p value for the maple treatment is 0.01. This is very close to the actual p value of 0.02
sum(zStoretreatGM>=2.231574)/length(zStoretreatGM)
#Estimated p value for the garlic mustard treatment is 0. This is close to the actual p value of 0.02

summary(fit_1)
```








#Visualizing --- the distribution of pathogens by treatment. 
```{r}


plot4<-dat2 %>% drop_na(Fern) %>% group_by(treatment) %>% summarize(Fern=mean(Fern,na.rm=T))


table(dat$comp_number)
ggplot(plot4)+
  geom_col(aes(x=treatment,y=Fern,fill=treatment))+theme_simple()+ylab("Average Fern Abundance\n(ferns/pot)")+xlab("Treatment")+theme(legend.position = "none")+
  scale_x_discrete(name="",labels=c("Alone","Garlic Mustard","Maple"))+
  scale_fill_manual(values=c("#009E73","#56B4E9","#E69F00"),labels=c("Alone","Garlic Mustard","Maple"))+
  theme_simple_multiCol()+theme(axis.title.y =  element_text(color = "black", size = 16, face = "bold",margin=margin(3,20,3,0)))
#dev.off()

```














#Visualizing -- effect of flavonoids on fern abundance. 
```{r}

PoisSlope=function(x,int){
  y=exp((-0.02353 -3.12475)*x+int)
  return(y)
}
exp(-0.6571)

flavplot=seq(min(dat$flav_Conc,na.rm = T),max(dat$flav_Conc,na.rm=T),length.out = 707)

flavyA<-PoisSlope(flavplot,-2.1683)
flavyM<-PoisSlope(flavplot,-1.60546-2.81450)
flavyGM<-PoisSlope(flavplot,-2.1683-0.2786)


#tiff("Defence_Figures/FlavonoidFern.tiff", units="in", width=10, height=6, res=300)
ggplot(dat)+
  geom_point(aes(y=Fern,x=flav_Conc,colour=treatment))+theme_simple()+
 # geom_path(x=flavplot,y=flavyA,size=1,colour="#009E73")
  #geom_path(x=flavplot,y=flavyGM,size=1,colour="#56B4E9")
  geom_path(x=flavplot,y=flavyM,size=1,colour="#E69F00")+
      scale_colour_manual(values=c("#009E73","#56B4E9","#E69F00"),labels=c("Alone","Garlic Mustard","Maple"))+
  scale_y_continuous(breaks=c(0,5,10,15,20,25,30,35,40))+
  ylab("Fern Abundance")+
xlab(bquote(bold("[Total Flavonoid] " (mg/ml))))
#dev.off()

```

How can we know that healthy plants dont just exhibit more secondary compounds and not that those with more secondary compounds are healthier?





